------------------------------------------------------------------------------------------------------------------------------------
      name:  plog_740
       log:  /accounts/projects/jr_ra/GRscarring/erratum/programs/prepare/collapse_march.log
  log type:  text
 opened on:  27 Nov 2024, 17:13:15

. /****************************************************************************
> Program: collapse_march_v3.do
> Author: Rachel Young
> Date: 4/17/2018
> 
> Description: Program creates the variables listed below from the March CPS
> - Annual earnings (pearnval)
> - Annual personal income (ptotval)
> - Number of weeks worked
> - An indicator for positive weeks worked.
> - Versions of annual earnings and income that 
>         (a) set zeros/negatives to missing, 
>         (b) are adjusted for inflation, and 
>         (c) are logged.
>         
> Source: This program combines the previous prepare_march.do (9/21/2017) and collapse_march.do (3/21/18)
> 
> modified RY 4/25/2018, Revised the collapse so that it has two education groups 
> modified JR 4/30/2018: Use 2, 4, and 5 education groups.
>                        Remove merge to unemployment rate (now in combinecollapse)
> modified RY 5/22/2018: Added new topcoded annual earnings variable (to be used for main analysis)
> modified NR 9/29/2018: Added new weight (marsupwt_retro) when merging `set'_retro
> to contemporary `set' dataset
> modified JR 1/9/2019:  Reduce variables to keep, and generate new weight that is limited
>                        to those with non-missing earnings.
> ****************************************************************************/
. 
. cap project, doinfo

. if _rc==0 {
.    local pdir "`r(pdir)'"                                                    // the project's main dir.
.    local dofile "`r(dofile)'"                                                // do-file's stub name
.    local sig {bind:{hi:[`dofile'.dta. RP : `dofile'.do, `c(current_date)']}} // a signature in notes
.    local doasproject=1
. }

. else {
.     local pdir "~/GRscarring"
.     local dofile "collapse_march"
.     local doasproject=0
. }

. 
. set more off

. local rootdir "`pdir'"

. local thisdir "`pdir'"

. 
. local prepdata "`pdir'/scratch"

. local rawdata "`pdir'/rawdata"

. local output "`pdir'/results"

. 
. 
. if `doasproject'==1 {
.   project, uses(`prepdata'/clean_compile_march.dta.gz)
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/clean_compile_march.dta.gz" filesig(243
> 0876695:937693911)
. }

. 
. 
. *************************************
. ** 0 LOAD STACKED MARCH CPS DATA **    
. *************************************  
. 
. *if `doasproject'==1 project, uses(`prepdata'/clean_compile_march_v1.dta.gz)
. 
. ! zcat `prepdata'/clean_compile_march.dta.gz > `prepdata'/clean_compile_march.dta


. use `prepdata'/clean_compile_march.dta, clear

. ! rm `prepdata'/clean_compile_march.dta


. 
. 
. ************************************************
. *********** 1 SAMPLE RESTRICTIONS *************
. ************************************************
. 
. keep if age>15 & age<81
(1,912,959 observations deleted)

. 
. *******************************************
. ******** 2 CREATE COLLAPSE VARIABLES ******
. *******************************************
. 
. ***** Cohort ****
. gen cohort=(year-age)

. 
. ****** Married *****
. gen married=(a_maritl<4)

. 
. ***** State (fipsst) *****
.  decode state, gen(state_name)

.  drop if state==. /* only 4 observations */
(0 observations deleted)

.  * Merge to population
.  merge m:1 state_name year using `prepdata'/statepop, assert(2 3) keep(3) keepusing(fipsst)
(variable state_name was str20, now str21 to accommodate using data's values)

    Result                      Number of obs
    -----------------------------------------
    Not matched                             0
    Matched                         5,153,610  (_merge==3)
    -----------------------------------------

. 
. ***** Education (educ5) ****
. *  HS completion
. *  Some college
. *  BA+
. *  Yrs educ
. tab a_hga, m

      Highest grade |
           attended |      Freq.     Percent        Cum.
--------------------+-----------------------------------
  Children under 15 |      2,525        0.05        0.05
               None |      7,385        0.14        0.19
     Elementary one |      2,868        0.06        0.25
     Elementary two |      5,298        0.10        0.35
   Elementary three |      9,060        0.18        0.53
    Elementary four |     10,489        0.20        0.73
    Elementary five |     15,873        0.31        1.04
     Elementary six |     23,322        0.45        1.49
   Elementary seven |     33,472        0.65        2.14
   Elementary eight |     78,717        1.53        3.67
    High School one |     63,766        1.24        4.90
    High School two |     94,122        1.83        6.73
  High School three |    244,828        4.75       11.48
   High School four |    477,751        9.27       20.75
        College one |    126,329        2.45       23.20
        College two |    117,726        2.28       25.49
      College three |     92,803        1.80       27.29
       College four |    124,569        2.42       29.71
       College five |     53,879        1.05       30.75
College six or more |     57,019        1.11       31.86
                 31 |     14,025        0.27       32.13
                 32 |     31,956        0.62       32.75
                 33 |     61,813        1.20       33.95
                 34 |     91,153        1.77       35.72
                 35 |    103,761        2.01       37.73
                 36 |    148,786        2.89       40.62
                 37 |    160,001        3.10       43.72
                 38 |     57,918        1.12       44.85
                 39 |  1,051,195       20.40       65.24
                 40 |    646,230       12.54       77.78
                 41 |    140,288        2.72       80.51
                 42 |    142,326        2.76       83.27
                 43 |    568,778       11.04       94.30
                 44 |    209,352        4.06       98.37
                 45 |     44,226        0.86       99.22
                 46 |     40,001        0.78      100.00
--------------------+-----------------------------------
              Total |  5,153,610      100.00

. gen ed_yrs=a_hga if year<=1991
(3,511,809 missing values generated)

. replace ed_yrs=0 if a_hga==31
(14,025 real changes made)

. replace ed_yrs=4 if a_hga==32
(31,956 real changes made)

. replace ed_yrs=6 if a_hga==33
(61,813 real changes made)

. replace ed_yrs=8 if a_hga==34
(91,153 real changes made)

. replace ed_yrs=a_hga-26 if inrange(a_hga, 35,38)
(470,466 real changes made)

. replace ed_yrs=12 if a_hga==39
(1,051,195 real changes made)

. replace ed_yrs=13 if a_hga==40
(646,230 real changes made)

. replace ed_yrs=14 if a_hga==41
(140,288 real changes made)

. replace ed_yrs=14 if a_hga==42
(142,326 real changes made)

. replace ed_yrs=16 if a_hga==43
(568,778 real changes made)

. replace ed_yrs=18 if a_hga==44
(209,352 real changes made)

. replace ed_yrs=19 if a_hga==45
(44,226 real changes made)

. replace ed_yrs=19 if a_hga==46
(40,001 real changes made)

. 
. gen educ5=1 

. replace educ5=2 if ed_hs==1
(1,296,023 real changes made)

. replace educ5=3 if ed_scol==1
(1,650,650 real changes made)

. replace educ5=4 if ed_ba==1
(661,581 real changes made)

. replace educ5=5 if ed_grad==1
(529,046 real changes made)

. label define attain_l 1 "LTHS" 2 "HS" 3 "Some col." 4 "BA" 5 "MA+"

. label values educ5 attain_l

. 
. *gen byte ed_scol_less=(educ5==1 | educ5==2 | educ5==3) if educ5<.
. *gen byte ed_ba_more=(educ5==4 | educ5==5) if educ5<.
. gen byte educ2=(inlist(educ5, 4, 5)) if educ5<.

. recode educ5 (1=1) (2=2) (3=3) (4 5=4), gen(educ4)
(529,046 differences between educ5 and educ4)

. 
. ******** Sex ******
. gen sex = 2

. replace sex = 1 if a_sex == 1
(2,457,528 real changes made)

. replace sex = 0 if a_sex == 2
(2,696,082 real changes made)

. tab sex

        sex |      Freq.     Percent        Cum.
------------+-----------------------------------
          0 |  2,696,082       52.31       52.31
          1 |  2,457,528       47.69      100.00
------------+-----------------------------------
      Total |  5,153,610      100.00

. 
. ***********************************************
. ******** 3 CREATE VARIABLES FOR ANALYSIS ******
. ***********************************************
. 
. ***** Annual Earnings ******
. 
. * Annual earnings (ann_ern)
. gen earn = pearnval

. 
. * Inflation adjusted annual earnings (ann_ern_r)
. gen earn_r = pearnval_r

. 
. * Setting 0's and negatives to missing (pearnval and pearnval_r)
.  gen posearn=(pearnval>0)

.  gen earn_pos=pearnval

.  replace earn_pos=. if posearn==0
(1,572,139 real changes made, 1,572,139 to missing)

.  
.  gen posearn_r=(pearnval_r>0)

.  gen earn_r_pos=pearnval_r

.  replace earn_r_pos=. if posearn_r==0
(1,572,139 real changes made, 1,572,139 to missing)

.  
.   //Another deals with topcoding -- censor at double the 90th percentile. (The 99th percentile 
.  //  ranges from 2* to 5* the 90th percentile over time, with some of that variation apparently
.  //  due to topcoding not reality.
.  // TC dumm variable is tcernval
.  levelsof year
1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 20
> 05 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018

.  gen earn_r_pos_tc=earn_r

.  foreach y of numlist `r(levels)' {
  2.    di "Real personal earnings distribution, `y'"
  3.    qui su earn_r_pos if earn_r_pos>0 & year==`y' [aw=marsupwt], d
  4.    local p90=r(p90)
  5.    local p99=r(p99)
  6.    di "Ratio of 99th percentile to 90th percentile in `y' is " `p99'/`p90'
  7.    replace earn_r_pos_tc=2*`p90' if year==`y' & earn_r_pos>2*`p90' & earn_r_pos<.
  8.  }
Real personal earnings distribution, 1979
Ratio of 99th percentile to 90th percentile in 1979 is 2.2727273
(1,888 real changes made)
Real personal earnings distribution, 1980
Ratio of 99th percentile to 90th percentile in 1980 is 2.0833333
(1,139 real changes made)
Real personal earnings distribution, 1981
Ratio of 99th percentile to 90th percentile in 1981 is 2
(104 real changes made)
Real personal earnings distribution, 1982
Ratio of 99th percentile to 90th percentile in 1982 is 2.1428571
(851 real changes made)
Real personal earnings distribution, 1983
Ratio of 99th percentile to 90th percentile in 1983 is 2.22
(889 real changes made)
Real personal earnings distribution, 1984
Ratio of 99th percentile to 90th percentile in 1984 is 2.2580645
(976 real changes made)
Real personal earnings distribution, 1985
Ratio of 99th percentile to 90th percentile in 1985 is 2.2571601
(1,026 real changes made)
Real personal earnings distribution, 1986
Ratio of 99th percentile to 90th percentile in 1986 is 2.1927714
(1,019 real changes made)
Real personal earnings distribution, 1987
Ratio of 99th percentile to 90th percentile in 1987 is 2.195122
(1,150 real changes made)
Real personal earnings distribution, 1988
Ratio of 99th percentile to 90th percentile in 1988 is 2.1794872
(967 real changes made)
Real personal earnings distribution, 1989
Ratio of 99th percentile to 90th percentile in 1989 is 2.375
(951 real changes made)
Real personal earnings distribution, 1990
Ratio of 99th percentile to 90th percentile in 1990 is 2.3809286
(1,275 real changes made)
Real personal earnings distribution, 1991
Ratio of 99th percentile to 90th percentile in 1991 is 2.2727045
(1,128 real changes made)
Real personal earnings distribution, 1992
Ratio of 99th percentile to 90th percentile in 1992 is 2.2222
(1,062 real changes made)
Real personal earnings distribution, 1993
Ratio of 99th percentile to 90th percentile in 1993 is 2.1367308
(1,118 real changes made)
Real personal earnings distribution, 1994
Ratio of 99th percentile to 90th percentile in 1994 is 2.0407959
(1,267 real changes made)
Real personal earnings distribution, 1995
Ratio of 99th percentile to 90th percentile in 1995 is 1.99998
(259 real changes made)
Real personal earnings distribution, 1996
Ratio of 99th percentile to 90th percentile in 1996 is 2.6364078
(1,049 real changes made)
Real personal earnings distribution, 1997
Ratio of 99th percentile to 90th percentile in 1997 is 2.7407407
(1,083 real changes made)
Real personal earnings distribution, 1998
Ratio of 99th percentile to 90th percentile in 1998 is 2.6785714
(1,231 real changes made)
Real personal earnings distribution, 1999
Ratio of 99th percentile to 90th percentile in 1999 is 2.5
(1,072 real changes made)
Real personal earnings distribution, 2000
Ratio of 99th percentile to 90th percentile in 2000 is 3.2989838
(1,288 real changes made)
Real personal earnings distribution, 2001
Ratio of 99th percentile to 90th percentile in 2001 is 4.7219385
(1,183 real changes made)
Real personal earnings distribution, 2002
Ratio of 99th percentile to 90th percentile in 2002 is 4.5816857
(2,018 real changes made)
Real personal earnings distribution, 2003
Ratio of 99th percentile to 90th percentile in 2003 is 2.8571429
(2,125 real changes made)
Real personal earnings distribution, 2004
Ratio of 99th percentile to 90th percentile in 2004 is 2.7777778
(2,228 real changes made)
Real personal earnings distribution, 2005
Ratio of 99th percentile to 90th percentile in 2005 is 2.7027027
(2,152 real changes made)
Real personal earnings distribution, 2006
Ratio of 99th percentile to 90th percentile in 2006 is 2.8114533
(1,941 real changes made)
Real personal earnings distribution, 2007
Ratio of 99th percentile to 90th percentile in 2007 is 2.849675
(1,864 real changes made)
Real personal earnings distribution, 2008
Ratio of 99th percentile to 90th percentile in 2008 is 2.5
(1,796 real changes made)
Real personal earnings distribution, 2009
Ratio of 99th percentile to 90th percentile in 2009 is 4.7224121
(2,019 real changes made)
Real personal earnings distribution, 2010
Ratio of 99th percentile to 90th percentile in 2010 is 4.5535714
(1,873 real changes made)
Real personal earnings distribution, 2011
Ratio of 99th percentile to 90th percentile in 2011 is 2.6470588
(1,790 real changes made)
Real personal earnings distribution, 2012
Ratio of 99th percentile to 90th percentile in 2012 is 2.7777778
(1,715 real changes made)
Real personal earnings distribution, 2013
Ratio of 99th percentile to 90th percentile in 2013 is 2.7777778
(1,864 real changes made)
Real personal earnings distribution, 2014
Ratio of 99th percentile to 90th percentile in 2014 is 2.7777778
(1,241 real changes made)
Real personal earnings distribution, 2015
Ratio of 99th percentile to 90th percentile in 2015 is 2.6315789
(1,867 real changes made)
Real personal earnings distribution, 2016
Ratio of 99th percentile to 90th percentile in 2016 is 2.62001
(1,577 real changes made)
Real personal earnings distribution, 2017
Ratio of 99th percentile to 90th percentile in 2017 is 2.87082
(1,754 real changes made)
Real personal earnings distribution, 2018
Ratio of 99th percentile to 90th percentile in 2018 is 2.9411765
(1,715 real changes made)

.  
. * Annual earnings logged (log_ann_ern_r)
.  gen log_earn_pos=ln(earn_pos)
(1,572,139 missing values generated)

.  gen log_earn_r_pos=ln(earn_r_pos)
(1,572,139 missing values generated)

.  gen log_earn_r_pos_tc=ln(earn_r_pos_tc)
(1,572,139 missing values generated)

.  gen log_pearnval_tc_r=ln(pearnval_tc_r)
(1,572,139 missing values generated)

.  
. local ernvars "log_pearnval_tc_r pearnval_tc_r earn earn_r earn_pos earn_r_pos earn_r_pos_tc log_earn_pos log_earn_r_pos log_earn_
> r_pos_tc tcernval posearn"

. 
. ***** Annual Income *******
. 
. * Annual Income (ann_inc)
.  gen inc=ptotval

. 
. * Inflation adjusted annual income (ann_inc_r)
.  gen inc_r=ptotval_r                                            

. 
. * Setting 0's and negatives to missing (ann_inc_pos ann_inc_pos)
.  gen posinc=(ptotval>0)

.  gen inc_pos=inc

.  replace inc_pos=. if posinc==0 
(486,144 real changes made, 486,144 to missing)

.  
.  gen posinc_r=(ptotval_r>0)

.  gen inc_r_pos=inc_r

.  replace inc_r_pos=. if posinc_r==0
(486,144 real changes made, 486,144 to missing)

.  
. * Top coding
. //Similar to the earnings variable, the 99th percentile ranges from 2* to ~5* the 90th percentile. 
. //  topcoded obervations are identified with the tcwsval variable
. levelsof year
1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 20
> 05 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018

. gen inc_r_pos_tc=inc_r_pos
(486,144 missing values generated)

.  foreach y of numlist `r(levels)' {
  2.    di "Real personal earnings distribution, `y'"
  3.    qui su inc_r_pos if inc_r_pos>0 & year==`y' [aw=marsupwt], d
  4.    local p90=r(p90)
  5.    local p99=r(p99)
  6.    di "Ratio of 99th percentile to 90th percentile in `y' is " `p99'/`p90'
  7.    replace inc_r_pos_tc=2*`p90' if year==`y' & inc_r_pos>2*`p90' & inc_r_pos<.
  8.  }
Real personal earnings distribution, 1979
Ratio of 99th percentile to 90th percentile in 1979 is 2.3134286
(2,640 real changes made)
Real personal earnings distribution, 1980
Ratio of 99th percentile to 90th percentile in 1980 is 2.1975851
(1,595 real changes made)
Real personal earnings distribution, 1981
Ratio of 99th percentile to 90th percentile in 1981 is 2.0221328
(1,604 real changes made)
Real personal earnings distribution, 1982
Ratio of 99th percentile to 90th percentile in 1982 is 2.1756504
(1,292 real changes made)
Real personal earnings distribution, 1983
Ratio of 99th percentile to 90th percentile in 1983 is 2.2992958
(1,487 real changes made)
Real personal earnings distribution, 1984
Ratio of 99th percentile to 90th percentile in 1984 is 2.2547491
(1,472 real changes made)
Real personal earnings distribution, 1985
Ratio of 99th percentile to 90th percentile in 1985 is 2.3412426
(1,526 real changes made)
Real personal earnings distribution, 1986
Ratio of 99th percentile to 90th percentile in 1986 is 2.2607026
(1,473 real changes made)
Real personal earnings distribution, 1987
Ratio of 99th percentile to 90th percentile in 1987 is 2.3197183
(1,582 real changes made)
Real personal earnings distribution, 1988
Ratio of 99th percentile to 90th percentile in 1988 is 2.3221237
(1,579 real changes made)
Real personal earnings distribution, 1989
Ratio of 99th percentile to 90th percentile in 1989 is 2.407765
(1,458 real changes made)
Real personal earnings distribution, 1990
Ratio of 99th percentile to 90th percentile in 1990 is 2.4199758
(1,756 real changes made)
Real personal earnings distribution, 1991
Ratio of 99th percentile to 90th percentile in 1991 is 2.3568151
(1,645 real changes made)
Real personal earnings distribution, 1992
Ratio of 99th percentile to 90th percentile in 1992 is 2.2838941
(1,609 real changes made)
Real personal earnings distribution, 1993
Ratio of 99th percentile to 90th percentile in 1993 is 2.2335698
(1,651 real changes made)
Real personal earnings distribution, 1994
Ratio of 99th percentile to 90th percentile in 1994 is 2.161
(1,714 real changes made)
Real personal earnings distribution, 1995
Ratio of 99th percentile to 90th percentile in 1995 is 2.0672745
(1,878 real changes made)
Real personal earnings distribution, 1996
Ratio of 99th percentile to 90th percentile in 1996 is 2.6283686
(1,524 real changes made)
Real personal earnings distribution, 1997
Ratio of 99th percentile to 90th percentile in 1997 is 2.6516762
(1,583 real changes made)
Real personal earnings distribution, 1998
Ratio of 99th percentile to 90th percentile in 1998 is 2.6893571
(1,691 real changes made)
Real personal earnings distribution, 1999
Ratio of 99th percentile to 90th percentile in 1999 is 2.5168833
(1,493 real changes made)
Real personal earnings distribution, 2000
Ratio of 99th percentile to 90th percentile in 2000 is 2.9167143
(1,614 real changes made)
Real personal earnings distribution, 2001
Ratio of 99th percentile to 90th percentile in 2001 is 3.1863099
(1,497 real changes made)
Real personal earnings distribution, 2002
Ratio of 99th percentile to 90th percentile in 2002 is 4.6742814
(2,695 real changes made)
Real personal earnings distribution, 2003
Ratio of 99th percentile to 90th percentile in 2003 is 2.6886539
(2,736 real changes made)
Real personal earnings distribution, 2004
Ratio of 99th percentile to 90th percentile in 2004 is 2.6147143
(2,711 real changes made)
Real personal earnings distribution, 2005
Ratio of 99th percentile to 90th percentile in 2005 is 2.5676379
(2,595 real changes made)
Real personal earnings distribution, 2006
Ratio of 99th percentile to 90th percentile in 2006 is 2.67228
(2,723 real changes made)
Real personal earnings distribution, 2007
Ratio of 99th percentile to 90th percentile in 2007 is 2.5888097
(2,463 real changes made)
Real personal earnings distribution, 2008
Ratio of 99th percentile to 90th percentile in 2008 is 2.5030432
(2,283 real changes made)
Real personal earnings distribution, 2009
Ratio of 99th percentile to 90th percentile in 2009 is 2.6558516
(2,522 real changes made)
Real personal earnings distribution, 2010
Ratio of 99th percentile to 90th percentile in 2010 is 2.6234603
(2,426 real changes made)
Real personal earnings distribution, 2011
Ratio of 99th percentile to 90th percentile in 2011 is 2.5339506
(2,401 real changes made)
Real personal earnings distribution, 2012
Ratio of 99th percentile to 90th percentile in 2012 is 2.6603068
(2,541 real changes made)
Real personal earnings distribution, 2013
Ratio of 99th percentile to 90th percentile in 2013 is 2.6148205
(2,591 real changes made)
Real personal earnings distribution, 2014
Ratio of 99th percentile to 90th percentile in 2014 is 2.6390746
(1,754 real changes made)
Real personal earnings distribution, 2015
Ratio of 99th percentile to 90th percentile in 2015 is 2.6926005
(2,629 real changes made)
Real personal earnings distribution, 2016
Ratio of 99th percentile to 90th percentile in 2016 is 2.633163
(2,382 real changes made)
Real personal earnings distribution, 2017
Ratio of 99th percentile to 90th percentile in 2017 is 2.6519
(2,457 real changes made)
Real personal earnings distribution, 2018
Ratio of 99th percentile to 90th percentile in 2018 is 2.6433923
(2,379 real changes made)

. 
. 
. * Annual income logged
. gen log_inc_pos = ln(inc_pos)
(486,144 missing values generated)

. gen log_inc_r_pos = ln(inc_r_pos)
(486,144 missing values generated)

. gen log_inc_r_pos_tc = ln(inc_r_pos_tc)
(486,144 missing values generated)

. 
. local incvars "inc inc_r inc_pos inc_r_pos inc_r_pos_tc log_inc_pos log_inc_r_pos log_inc_r_pos_tc tcwsval posinc"

. 
. ****** Weeks worked ******
. 
. *Number of weeks worked (wkswork)
. 
. *Indicator for positive weeks worked (wkswork_pos)
. gen wkswork_pos=(wkswork>0)

. 
. local workvars "wkswork wkswork_pos"

. 
. gen marsupwt_log_pearnval_tc_r=marsupwt if log_pearnval_tc_r<.
(1,572,139 missing values generated)

. local collapsevars "log_pearnval_tc_r"

. local weightvars "marsupwt marsupwt_log_pearnval_tc_r"

. 
. *******************
. ****** SAVE *******
. *******************
. sort year cohort fipsst educ5

. tempfile all 

. save `all'
file /tmp/St2868349.000004 saved as .dta format

. 
. 
. *************************************
. *********** 4 COLLAPSE *************
. ************************************
. 
. ** 1.1: Year-cohort
. collapse (mean) sex married ed_hs ed_scol ed_ba ed_grad ed_yrs `ernvars' `incvars' `workvars' ///
>          (count) n_obs=marsupwt (rawsum) `weightvars' [aw=marsupwt], by(year cohort)
(note: aweights not used to compute counts)

. tempfile yc

. save `yc'
file /tmp/St2868349.000005 saved as .dta format

. 
. ** 1.2: Year-cohort-state
. use `all', clear

. collapse (mean) married ed_hs ed_scol ed_ba ed_grad ed_yrs `ernvars' `incvars' `workvars' ///
>          (count) n_obs=marsupwt (rawsum) `weightvars' [aw=marsupwt], by(year cohort fipsst)
(note: aweights not used to compute counts)

. tempfile ycs

. save `ycs'
file /tmp/St2868349.000006 saved as .dta format

. 
. ** 1.3: Year-cohort-attainment (5 category)
. use `all', clear

. collapse (mean) married `ernvars' `incvars' `workvars' ///
>          (count) n_obs=marsupwt (rawsum) `weightvars' [aw=marsupwt], by(year cohort educ5)
(note: aweights not used to compute counts)

. tempfile yca5

. save `yca5'
file /tmp/St2868349.000007 saved as .dta format

. 
. ** 1.4: Year-cohort-attainment (5 category)-state
. use `all', clear

. collapse (mean) married `ernvars' `incvars' `workvars' ///
>          (count) n_obs=marsupwt (rawsum) `weightvars' [aw=marsupwt], by(year cohort fipsst educ5)
(note: aweights not used to compute counts)

. tempfile yca5s

. save `yca5s'
file /tmp/St2868349.000008 saved as .dta format

. 
. ** 1.5: Year-cohort-attainment (4 category)
. use `all', clear

. collapse (mean) married `ernvars' `incvars' `workvars' ///
>          (count) n_obs=marsupwt (rawsum) `weightvars' [aw=marsupwt], by(year cohort educ4)
(note: aweights not used to compute counts)

. tempfile yca4

. save `yca4'
file /tmp/St2868349.000009 saved as .dta format

. 
. ** 1.6: Year-cohort-attainment (4 category)-state
. use `all', clear

. collapse (mean) married `ernvars' `incvars' `workvars' ///
>          (count) n_obs=marsupwt (rawsum) `weightvars' [aw=marsupwt], by(year cohort fipsst educ4)
(note: aweights not used to compute counts)

. tempfile yca4s

. save `yca4s'
file /tmp/St2868349.00000a saved as .dta format

. 
. ** 1.7: Year-cohort-attainment (2 category)
. use `all', clear

. collapse (mean) married `ernvars' `incvars' `workvars' ///
>          (count) n_obs=marsupwt (rawsum) `weightvars' [aw=marsupwt], by(year cohort educ2)
(note: aweights not used to compute counts)

. tempfile yca2

. save `yca2'
file /tmp/St2868349.00000b saved as .dta format

. 
. ** 1.8: Year-cohort-attainment (2 category)-state
. use `all', clear

. collapse (mean) married `ernvars' `incvars' `workvars' ///
>          (count) n_obs=marsupwt (rawsum) `weightvars' [aw=marsupwt], by(year cohort fipsst educ2)
(note: aweights not used to compute counts)

. tempfile yca2s

. save `yca2s'
file /tmp/St2868349.00000c saved as .dta format

. 
. ** 1.9: Year-cohort-attainment (2 category)-sex-state
. use `all', clear

. collapse (mean) married `ernvars' `incvars' `workvars' ///
>          (count) n_obs=marsupwt (rawsum) `weightvars' [aw=marsupwt], by(year cohort fipsst educ2 sex)
(note: aweights not used to compute counts)

. tempfile yca2ss

. save `yca2ss'
file /tmp/St2868349.00000d saved as .dta format

. 
. /*
> ********************************************
> **** 5  MERGE POPULATION TIME SERIES *****
> ********************************************
>  // Make national version of population dataset
>   use `prepdata'/statepop
>   collapse (sum) pop, by(year)
>   tempfile natlpop
>   save `natlpop'
>  // Make annual versions of unemployment rates
>   use `prepdata'/unrate_national
>   isid yearmo
>   sort yearmo
>   gen year=yofd(dofm(yearmo))
>   bys year (yearmo): keep if _n==_N
>   keep year ur_nat_annual ur_nat_3yr_avg
>   tempfile natlur
>   save `natlur'
>   use `prepdata'/unrate_state
>   isid fipsst yearmo
>   sort fipsst yearmo
>   gen year=yofd(dofm(yearmo))
>   bys fipsst year (yearmo): keep if _n==_N
>   keep fipsst year ur_st_annual ur_st_3yr_avg
>   tempfile stateur
>   save `stateur'
>   exit
>   
>  // Merge to population
>   foreach set in yc yca {
>     use ``set''
>     merge m:1 year using `natlpop', assert(2 3) keep(3) nogen
>     save ``set'', replace
>   }
>   foreach set in ycs ycas {
>     use ``set''
>     merge m:1 fipsst year using `prepdata'/statepop, assert(2 3) keep(3) nogen
>     save ``set'', replace
>   }
> 
>  // Merge to national unemployment rate
>   foreach set in yc ycs yca ycas {
>     use ``set''
>     merge m:1 year using `natlur', assert(2 3) keep(3) nogen
>     label var ur_nat_annual "Unemployment rate (national)"
>     rename ur_nat_annual ur_nat
>     // Now merge on the unemployment rate at age 22
>     rename year origyear
>     gen year=cohort + 22
>     merge m:1 year using `natlur', keep(1 3) nogen
>     label var ur_nat_annual "UR (natl) at age 22"
>     rename ur_nat_annual ur0_nat
>     drop year
>     rename origyear year
>     save ``set'', replace
>   }
>  // Merge to state unemployment rate
>   foreach set in ycs ycas {
>     use ``set''
>     merge m:1 fipsst year using `stateur', assert(2 3) keep(3) nogen
>     label var ur_st_annual "Unemployment rate (state)"
>     rename ur_st_annual ur_st
>     // Now merge on the unemployment rate at age 22
>       rename year origyear
>       gen year=cohort + 22
>       merge m:1 fipsst year using `stateur', keep(1 3) nogen
>       label var ur_st_annual "UR (state) at age 22"
>       rename ur_st_annual ur0_st
>       drop year
>       rename origyear year
>     save ``set'', replace
>   }
> */
. 
. ********************************************
. **** 6  LABEL VARIABLES *****
. ********************************************
.   foreach set in yc ycs yca5 yca5s yca4 yca4s yca2 yca2s yca2ss {
  2.     use ``set''
  3.     label var n_obs "Number of observations in cell"
  4.     label var marsupwt "Sum of weights (unweighted/raw)"                                                        
  5.     label var married "Married"
  6.     cap label var ed_hs "Educ: HS grad"
  7.     cap label var ed_scol "Educ: Some coll"
  8.     cap label var ed_ba "Educ: Bach degree"
  9.     cap label var ed_grad "Educ: >Bach degree"*/
 10.     cap label var ed_yrs "Educ: years"
 11.         
.     label var log_pearnval_tc_r "ln(Annual earnings), for analysis"
 12.     label var marsupwt_log_pearnval_tc_r "Sum of weights (log_pearnval_tc_r<.)"                                                
>  
 13. 
.     
.     label var pearnval_tc_r "Annual earnings, for analysis"
 14.     label var earn "Annual earnings"
 15.     label var earn_r "Annual earnings (2015$)"
 16.     label var earn_pos "Annual earnings | ern>0"
 17.     label var earn_r_pos "Annual earnings | ern>0 (2015$)"
 18.     label var earn_r_pos_tc "Top coded, annual earnings | ern>0,  (2015$)"
 19.     label var log_earn_pos "Ln(annual earnings | ern>0)"
 20.     label var log_earn_r_pos "Ln(annual earnings | ern>0) (2015$)"
 21.     label var log_earn_r_pos_tc "Ln(Top coded annual earnings | ern>0) (2015$)"
 22.     label var inc "Annual income "
 23.     label var inc_r "Annual income (2015$)"
 24.     label var inc_pos "Annual income | inc>0"   
 25.     label var inc_r_pos "Annual income | inc>0 (2015$)"
 26.     label var inc_r_pos_tc "Top coded, annual income | inc>0,  (2015$)"
 27.     label var log_inc_pos "Ln(annual income) | inc>0"
 28.     label var log_inc_r_pos "Ln(annual income) | inc>0, (2015$)"
 29.     label var log_inc_r_pos_tc "Ln(Top coded annual income) | inc>0, (2015$)"
 30.         
.     label var wkswork "Weeks worked"
 31.     label var wkswork_pos "Weeks worked | weeks>0"
 32.     
. 
.     save ``set'', replace
 33.   }
file /tmp/St2868349.000005 saved as .dta format
file /tmp/St2868349.000006 saved as .dta format
file /tmp/St2868349.000007 saved as .dta format
file /tmp/St2868349.000008 saved as .dta format
file /tmp/St2868349.000009 saved as .dta format
file /tmp/St2868349.00000a saved as .dta format
file /tmp/St2868349.00000b saved as .dta format
file /tmp/St2868349.00000c saved as .dta format
file /tmp/St2868349.00000d saved as .dta format

. 
. *****************************************************
. **** 7  ADJUST YEAR FOR RETROSPECTIVE VARIABLES *****
. *****************************************************
. foreach set in yc ycs yca5 yca5s yca4 yca4s yca2 yca2s yca2ss {
  2.   if "`set'"=="yc" local id "cohort"
  3.   if "`set'"=="ycs" local id "cohort fipsst"
  4.   if "`set'"=="yca5" local id "cohort educ5"
  5.   if "`set'"=="yca5s" local id "cohort educ5 fipsst"
  6.   if "`set'"=="yca4" local id "cohort educ4"
  7.   if "`set'"=="yca4s" local id "cohort educ4 fipsst"
  8.   if "`set'"=="yca2" local id "cohort educ2"
  9.   if "`set'"=="yca2s" local id "cohort educ2 fipsst"
 10.   if "`set'"=="yca2ss" local id "cohort sex educ2 fipsst"
 11.   
.   use ``set'', clear
 12.   tempfile `set'_contemp `set'_retro
 13.   //keep year `id' employed_ly ann_* log_* wkswork* hrswk_ly* 
.   //keep year `id' `collapsevars' `weightvars'
.   //rename marsupwt marsupwt_retro
.   isid year `id'
 14.   sort `id' year
 15.   replace year=year-1
 16.   /* 
>   save `set'_retro, replace
>   use ``set'', clear
>   //drop employed_ly ann_* log_* wkswork* hrswk_ly* 
>   drop `incvars' `ernvars' 
>   merge 1:1 year `id' using `set'_retro, nogen 
> 
>   //replace marsupwt with marsupwt_retro
>   //if marsupwt == .
>   replace marsupwt = marsupwt_retro if missing(marsupwt)
>    */
.   save ``set'', replace
 17. }
(2,600 real changes made)
file /tmp/St2868349.000005 saved as .dta format
(132,574 real changes made)
file /tmp/St2868349.000006 saved as .dta format
(12,849 real changes made)
file /tmp/St2868349.000007 saved as .dta format
(570,894 real changes made)
file /tmp/St2868349.000008 saved as .dta format
(10,353 real changes made)
file /tmp/St2868349.000009 saved as .dta format
(487,425 real changes made)
file /tmp/St2868349.00000a saved as .dta format
(5,154 real changes made)
file /tmp/St2868349.00000b saved as .dta format
(248,671 real changes made)
file /tmp/St2868349.00000c saved as .dta format
(474,471 real changes made)
file /tmp/St2868349.00000d saved as .dta format

.    
. *************************************
. ****** 8 COMPRESS AND SAVE *********
. *************************************
. foreach col in yc ycs yca5 yca5s yca4 yca4s yca2 yca2s yca2ss {
  2.   use ``col'', clear
  3.   save "`prepdata'/`dofile'_`col'.dta", replace
  4.   *! gzip -f `prepdata'/`dofile'_`col'.dta
.   *project, creates("`prepdata'/`dofile'_`col'.dta.gz")
.   if `doasproject'==1 project, creates("`prepdata'/`dofile'_`col'.dta")
  5. }
(file /accounts/projects/jr_ra/GRscarring/erratum/scratch/collapse_march_yc.dta not found)
file /accounts/projects/jr_ra/GRscarring/erratum/scratch/collapse_march_yc.dta saved
project GRscar_erratum > do-file creates: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/collapse_march_yc.dta" filesig(34104
> 0703:752821)
(file /accounts/projects/jr_ra/GRscarring/erratum/scratch/collapse_march_ycs.dta not found)
file /accounts/projects/jr_ra/GRscarring/erratum/scratch/collapse_march_ycs.dta saved
project GRscar_erratum > do-file creates: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/collapse_march_ycs.dta" filesig(1022
> 75297:35958453)
(file /accounts/projects/jr_ra/GRscarring/erratum/scratch/collapse_march_yca5.dta not found)
file /accounts/projects/jr_ra/GRscarring/erratum/scratch/collapse_march_yca5.dta saved
project GRscar_erratum > do-file creates: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/collapse_march_yca5.dta" filesig(392
> 2340501:3085103)
(file /accounts/projects/jr_ra/GRscarring/erratum/scratch/collapse_march_yca5s.dta not found)
file /accounts/projects/jr_ra/GRscarring/erratum/scratch/collapse_march_yca5s.dta saved
project GRscar_erratum > do-file creates: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/collapse_march_yca5s.dta" filesig(70
> 9728107:136472225)
(file /accounts/projects/jr_ra/GRscarring/erratum/scratch/collapse_march_yca4.dta not found)
file /accounts/projects/jr_ra/GRscarring/erratum/scratch/collapse_march_yca4.dta saved
project GRscar_erratum > do-file creates: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/collapse_march_yca4.dta" filesig(106
> 9742586:2490835)
(file /accounts/projects/jr_ra/GRscarring/erratum/scratch/collapse_march_yca4s.dta not found)
file /accounts/projects/jr_ra/GRscarring/erratum/scratch/collapse_march_yca4s.dta saved
project GRscar_erratum > do-file creates: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/collapse_march_yca4s.dta" filesig(67
> 9776490:116522914)
(file /accounts/projects/jr_ra/GRscarring/erratum/scratch/collapse_march_yca2.dta not found)
file /accounts/projects/jr_ra/GRscarring/erratum/scratch/collapse_march_yca2.dta saved
project GRscar_erratum > do-file creates: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/collapse_march_yca2.dta" filesig(246
> 6862194:1217395)
(file /accounts/projects/jr_ra/GRscarring/erratum/scratch/collapse_march_yca2s.dta not found)
file /accounts/projects/jr_ra/GRscarring/erratum/scratch/collapse_march_yca2s.dta saved
project GRscar_erratum > do-file creates: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/collapse_march_yca2s.dta" filesig(79
> 8464118:57720011)
(file /accounts/projects/jr_ra/GRscarring/erratum/scratch/collapse_march_yca2ss.dta not found)
file /accounts/projects/jr_ra/GRscarring/erratum/scratch/collapse_march_yca2ss.dta saved
project GRscar_erratum > do-file creates: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/collapse_march_yca2ss.dta" filesig(9
> 71677936:113902019)

. 
. 
. * end of do file *
. 
end of do-file
      name:  plog_740
       log:  /accounts/projects/jr_ra/GRscarring/erratum/programs/prepare/collapse_march.log
  log type:  text
 closed on:  27 Nov 2024, 17:23:40
------------------------------------------------------------------------------------------------------------------------------------
